import  requests
import time
import lxml
from  bs4 import BeautifulSoup
#//爬虫三大库 requests  BeautifulSoup Lxml
header= {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36'}
#result=requests.get("http://www.560315.com/Transaction/MatchCapacityTransactionList",header)
#soup= BeautifulSoup(result.text, 'html.parser')
#/resulttext=soup.find_all("tr","y_listtable");
#td = soup.select('div.y_main > div > div > div.y_more_left_txt > div.y_more_left_txt > table > tr > td:nth-of-type(1) > a ')
#td1 = soup.select('div.y_main > div > div > div.y_more_left_txt > div.y_more_left_txt > table > tr > td:nth-of-type(2) > a ')
#td2 = soup.select('div.y_main > div > div > div.y_more_left_txt > div.y_more_left_txt > table > tr > td:nth-of-type(3)')

#for a,b,c in zip(td,td1,td2):
#    print(a.get_text()+"-"+b.get_text()+"-"+c.get_text())


#con2_newtab_2 > div > table > tbody > tr:nth-child(2) > td:nth-child(1) > a
#body > section > div.y_main > div > div > div.y_more_left_txt > div.y_more_left_txt > table > tbody > tr:nth-child(2) > td:nth-child(1) > a
#body > section > div.y_main > div > div > div.y_more_left_txt > div.y_more_left_txt > table > tbody > tr:nth-child(2) > td:nth-child(1) > a  浏览器的selector 方式
#> td:nth-of-type > a
#eg1
#判断年龄
def judment_sex(class_name):
    if class_name==['member_icol']:
        return "女"
    else:
        return "男"
#定义获取详细页URL的函数
def get_links(url):
    wd_data=requests.get(url,header)
    soup=BeautifulSoup(wd_data.text,'lxml')
    links=soup.select('#page_list > ul > li > a')
    for lin in links:
        href=lin.get("href")
        get_info(href)

def get_info(url):
    wb_data=requests.get(url,header)
    soup = BeautifulSoup(wb_data.text, 'lxml')
    titles = soup.select('div.pho_info > h4')
    addresses = soup.select('span.pr5')
    prices = soup.select('#pricePart > div.day_l > span')
    imgs = soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > a > img')
    names = soup.select('#floatRightBox > div.js_box.clearfix > div.w_240 > h6 > a')
    sexs = soup.select('#floatRightBox > div.js_box.clearfix > div.member_pic > div')
    try:
        for title, address, price, img, name, sex in zip(titles, addresses, prices, imgs, names, sexs):
              data = {
                'title': title.get_text().strip(),
                'address': address.get_text().strip(),
                'price': price.get_get_text().strip(),
                'img': img.get('src'),
                'name': name.get_text(),
                'sex': judment_sex(sex.get("class"))
                }
              print(data)

    except Exception as e:
     print(e)


urls=['http://bj.xiaozhu.com/search-duanzufang-p{}-0/'.format(number) for number in range(1,14)]
print(urls)
for single_url in urls:
    get_links(single_url)
   # time.sleep(2)







